/*
In this exercise, you are to measure the size and cost of accessing a TLB.
The idea is based on work by Saavedra-Barrera, who developed a simple but
beautiful method to measure numerous aspects of cache hierarchies, all
with a very simple user-level program. Read his work for more details.

The basic idea is to access some number of pages within a large data structure
(e.g., an array) and to time those accesses. For example, let’s say the TLB
size of a machine happens to be 4 (which would be very small, but useful for
the purposes of this discussion). If you write a program that touches 4 or fewer
pages, each access should be a TLB hit, and thus relatively fast. However, once
you touch 5 pages or more, repeatedly in a loop, each access will suddenly jum
in cost, to that of a TLB miss.
 */

#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <time.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#define __GNU_SOURCE
#define __USE_GNU
#include <sched.h>

static const int tested_cpu = 0;
static const int group_fd_leader = -1;

struct read_format
{
  uint64_t value;
  uint64_t time_enabled;
  uint64_t time_running;
  uint64_t id;
};

struct cpu_process_monitor_conf
{
  pid_t pid;
  int cpu;
};

struct cpu_process_monitor_conf
get_conf_monitor_each_process_on_cpu(int cpu)
{
  return (struct cpu_process_monitor_conf){-1, cpu};
}

struct cpu_process_monitor_conf
get_conf_monitor_calling_process(void)
{
  return (struct cpu_process_monitor_conf){0, -1};
}

struct perf_event_attr
get_perf_event_attr_for_page_fault(void)
{
  struct perf_event_attr attr = {0};
  attr.type = PERF_TYPE_SOFTWARE;
  attr.size = sizeof(struct perf_event_attr);
  attr.config = PERF_COUNT_SW_PAGE_FAULTS;
  // attr.config = PERF_COUNT_SW_CPU_CLOCK;
  attr.disabled = 0;
  attr.exclude_user = 0;
  attr.read_format = PERF_FORMAT_ID;

  return attr;
}

void
measure(void)
{
  struct cpu_process_monitor_conf conf =
    get_conf_monitor_calling_process();
    //    get_conf_monitor_each_process_on_cpu(tested_cpu);
  int fd;
  struct perf_event_attr attr = get_perf_event_attr_for_page_fault();
  if (fd = syscall(SYS_perf_event_open,
                   &attr,
                   conf.pid, conf.cpu,
                   group_fd_leader, 0) == -1)
    {
      perror("perf_event_open");
      exit(EXIT_FAILURE);
    }
  unsigned char buffer[sizeof(struct read_format)];
  if (read(fd, buffer, sizeof(struct read_format)) > 0)
    {
      puts("something inside");
    }
  close(fd);
}

struct pipe_t
{
  int pipefd[2];
  bool valid;
};

#define on_error(valid, msg) \
  if (!valid) { \
  perror(msg); \
  exit(EXIT_FAILURE); } \

static void
sig_handler(int signum)
{
  puts("child terminated because of parent");
  exit(EXIT_SUCCESS);
}

static void reactor(long page_size)
{
  const int area_pages = 10;
  const size_t ss = page_size / sizeof(int) * area_pages;
  int *area = malloc(ss);
  for (int i = 0; i < ss; i += page_size / sizeof(int) / 2)
    {
      clock_t start, end;
      start = clock();
      area[i] = rand() % 100 + 20;
      end = clock();
      printf("clocks[%d]: %ld\n", i, end - start);
    }
  free(area);
}

int
main(void)
{
  pid_t pid = getpid();
  cpu_set_t cpu_set = {0};
  CPU_SET(tested_cpu, &cpu_set);
  sched_setaffinity(pid, sizeof cpu_set, &cpu_set);

  printf("current pid: %ld\n", pid);
  printf("on cpu: %d\n", sched_getcpu());

  long page_size = sysconf(_SC_PAGESIZE);
  printf("pagesize: %ld\n", page_size);

  struct pipe_t a_pipe = {0};
  if (pipe(a_pipe.pipefd) == 0)
    a_pipe.valid = true;

  on_error(a_pipe.valid, "pipe");

  pid_t fpid = fork();
  if (fpid == -1)
    {
      perror("fork");
      exit(EXIT_FAILURE);
    }

  if (fpid == 0)
    {
      signal(SIGTERM, sig_handler);
      prctl(PR_SET_PDEATHSIG, SIGTERM);
      close(a_pipe.pipefd[0]);
      write(a_pipe.pipefd[1], "hello", 5);
      close(a_pipe.pipefd[1]);
      measure();
      exit(EXIT_SUCCESS);
    }
  close(a_pipe.pipefd[1]);
  char buffer[10];
  ssize_t ss;
  if ((ss = read(a_pipe.pipefd[0], buffer, 10)) > 0)
    {
      buffer[ss] = '\0';
      puts(buffer);
    }
  close(a_pipe.pipefd[0]);
  reactor(page_size);
  puts("reactor finished");
  wait(NULL);
  return 0;
}
